{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-09-17T07:42:07.365340Z",
     "start_time": "2025-09-17T07:42:02.544129Z"
    }
   },
   "source": [
    "import os\n",
    "os.environ[\"TAVILY_API_KEY\"] = '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'\n",
    "from llama_index.tools.tavily_research import TavilyToolSpec\n",
    "tavily_tool = TavilyToolSpec(api_key='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')\n",
    "documents = tavily_tool.search(\"好笑的笑话\", max_results=20)"
   ],
   "outputs": [],
   "execution_count": 5
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-17T07:42:07.397341Z",
     "start_time": "2025-09-17T07:42:07.379341Z"
    }
   },
   "cell_type": "code",
   "source": [
    "import asyncio\n",
    "import nest_asyncio\n",
    "nest_asyncio.apply()"
   ],
   "id": "de87027ac1f94831",
   "outputs": [],
   "execution_count": 6
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-17T07:42:08.864373Z",
     "start_time": "2025-09-17T07:42:07.422345Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from llama_index.core import Settings\n",
    "os.environ['DEEPSEEK_API_KEY']='sk-f5f3487896554276aaf657fe9b9661a7'\n",
    "from llama_index.llms.deepseek import DeepSeek\n",
    "from llama_index.core.embeddings import resolve_embed_model\n",
    "\n",
    "base_embed_model = resolve_embed_model(\"local:D:/pythonProject17/transformers/model_em/BAAI/bge-small-en-v1.5\")\n",
    "llm=DeepSeek(model='deepseek-chat')\n",
    "\n",
    "Settings.llm=llm\n",
    "Settings.embed_model=base_embed_model"
   ],
   "id": "9724fb176e66a3fc",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: D:/pythonProject17/transformers/model_em/BAAI/bge-small-en-v1.5\n",
      "Load pretrained SentenceTransformer: D:/pythonProject17/transformers/model_em/BAAI/bge-small-en-v1.5\n"
     ]
    }
   ],
   "execution_count": 7
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-17T07:42:08.896368Z",
     "start_time": "2025-09-17T07:42:08.878375Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from llama_index.finetuning import generate_qa_embedding_pairs\n",
    "from llama_index.core.evaluation import EmbeddingQAFinetuneDataset"
   ],
   "id": "3400563321b96d0c",
   "outputs": [],
   "execution_count": 8
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-17T07:42:47.558283Z",
     "start_time": "2025-09-17T07:42:44.675095Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from llama_index.core.node_parser import SentenceSplitter\n",
    "spliter=SentenceSplitter(chunk_size=512, chunk_overlap=20)\n",
    "nodes=await spliter.aget_nodes_from_documents(documents=documents,show_progress=True)"
   ],
   "id": "7892658ea939cd41",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parsing nodes:   0%|          | 0/20 [00:00<?, ?it/s]"
      ],
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
       "model_id": "7784a18473a644eba23ce8821c2eef2b"
      }
     },
     "metadata": {},
     "output_type": "display_data",
     "jetTransient": {
      "display_id": null
     }
    }
   ],
   "execution_count": 9
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-17T07:42:57.816854Z",
     "start_time": "2025-09-17T07:42:57.804857Z"
    }
   },
   "cell_type": "code",
   "source": [
    "train_nodes=nodes[:8]\n",
    "val_nodes=nodes[8:]"
   ],
   "id": "8e0d2e3f42a63c3f",
   "outputs": [],
   "execution_count": 10
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-17T07:50:15.593686Z",
     "start_time": "2025-09-17T07:43:10.865101Z"
    }
   },
   "cell_type": "code",
   "source": [
    "\n",
    "train_dataset = generate_qa_embedding_pairs(\n",
    "    llm=llm,\n",
    "    nodes=train_nodes,\n",
    "    output_path=\"train_dataset.json\",\n",
    ")\n",
    "val_dataset = generate_qa_embedding_pairs(\n",
    "    llm=llm,\n",
    "    nodes=val_nodes,\n",
    "    output_path=\"val_dataset.json\",\n",
    ")"
   ],
   "id": "ad4e912f786577e7",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/8 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 12%|█▎        | 1/8 [00:09<01:03,  9.11s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 25%|██▌       | 2/8 [00:17<00:52,  8.70s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 38%|███▊      | 3/8 [00:31<00:54, 10.98s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 50%|█████     | 4/8 [00:37<00:36,  9.21s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 62%|██████▎   | 5/8 [00:45<00:25,  8.62s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 75%|███████▌  | 6/8 [00:51<00:15,  7.97s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 88%|████████▊ | 7/8 [00:58<00:07,  7.54s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 8/8 [01:04<00:00,  8.08s/it]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Final dataset saved.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/56 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  2%|▏         | 1/56 [00:06<05:45,  6.28s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  4%|▎         | 2/56 [00:12<05:34,  6.19s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  5%|▌         | 3/56 [00:18<05:36,  6.36s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  7%|▋         | 4/56 [00:26<05:57,  6.87s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  9%|▉         | 5/56 [00:31<05:19,  6.26s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 11%|█         | 6/56 [00:37<04:56,  5.93s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 12%|█▎        | 7/56 [00:43<04:51,  5.95s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 14%|█▍        | 8/56 [00:51<05:27,  6.83s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 16%|█▌        | 9/56 [00:57<05:06,  6.53s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 18%|█▊        | 10/56 [01:03<04:56,  6.44s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 20%|█▉        | 11/56 [01:10<04:54,  6.53s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██▏       | 12/56 [01:17<04:57,  6.75s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 23%|██▎       | 13/56 [01:24<04:46,  6.66s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 25%|██▌       | 14/56 [01:32<04:52,  6.98s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 27%|██▋       | 15/56 [01:40<04:58,  7.29s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 29%|██▊       | 16/56 [01:45<04:33,  6.83s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 30%|███       | 17/56 [01:52<04:27,  6.85s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 32%|███▏      | 18/56 [01:58<04:09,  6.55s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 34%|███▍      | 19/56 [02:03<03:44,  6.07s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 36%|███▌      | 20/56 [02:09<03:32,  5.90s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 38%|███▊      | 21/56 [02:16<03:39,  6.26s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 39%|███▉      | 22/56 [02:22<03:38,  6.42s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 41%|████      | 23/56 [02:29<03:31,  6.42s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 43%|████▎     | 24/56 [02:36<03:36,  6.77s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 45%|████▍     | 25/56 [02:44<03:35,  6.95s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 46%|████▋     | 26/56 [02:49<03:13,  6.44s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 48%|████▊     | 27/56 [02:55<02:59,  6.19s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 50%|█████     | 28/56 [03:01<02:54,  6.22s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 29/56 [03:07<02:44,  6.10s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 54%|█████▎    | 30/56 [03:12<02:32,  5.87s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 55%|█████▌    | 31/56 [03:20<02:39,  6.38s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 57%|█████▋    | 32/56 [03:26<02:29,  6.22s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 59%|█████▉    | 33/56 [03:32<02:25,  6.32s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 61%|██████    | 34/56 [03:41<02:36,  7.13s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 62%|██████▎   | 35/56 [03:47<02:23,  6.82s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 64%|██████▍   | 36/56 [03:53<02:12,  6.64s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 66%|██████▌   | 37/56 [03:59<01:58,  6.21s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 68%|██████▊   | 38/56 [04:04<01:48,  6.02s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 70%|██████▉   | 39/56 [04:11<01:44,  6.14s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 71%|███████▏  | 40/56 [04:17<01:38,  6.17s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 73%|███████▎  | 41/56 [04:23<01:32,  6.13s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 75%|███████▌  | 42/56 [04:32<01:36,  6.92s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 77%|███████▋  | 43/56 [04:38<01:26,  6.68s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 79%|███████▊  | 44/56 [04:44<01:19,  6.61s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 80%|████████  | 45/56 [04:52<01:15,  6.89s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 46/56 [05:00<01:13,  7.34s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 47/56 [05:07<01:03,  7.10s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 86%|████████▌ | 48/56 [05:11<00:51,  6.40s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 88%|████████▊ | 49/56 [05:17<00:42,  6.03s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 89%|████████▉ | 50/56 [05:22<00:35,  5.96s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 91%|█████████ | 51/56 [05:30<00:32,  6.42s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 93%|█████████▎| 52/56 [05:36<00:25,  6.27s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▍| 53/56 [05:43<00:19,  6.55s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 96%|█████████▋| 54/56 [05:48<00:12,  6.06s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 98%|█████████▊| 55/56 [05:53<00:05,  5.75s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.deepseek.com/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 56/56 [06:00<00:00,  6.43s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Final dataset saved.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "execution_count": 11
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-17T07:50:55.391073Z",
     "start_time": "2025-09-17T07:50:54.210167Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from llama_index.finetuning import SentenceTransformersFinetuneEngine\n",
    "finetune_engine = SentenceTransformersFinetuneEngine(\n",
    "    train_dataset,\n",
    "    model_id=r\"D:/pythonProject17/transformers/model_em/BAAI/bge-small-en-v1.5\",\n",
    "    model_output_path=\"test_model\",\n",
    "    val_dataset=val_dataset,\n",
    ")"
   ],
   "id": "bfad42ac713c765f",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda:0\n",
      "Use pytorch device_name: cuda:0\n",
      "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: D:/pythonProject17/transformers/model_em/BAAI/bge-small-en-v1.5\n",
      "Load pretrained SentenceTransformer: D:/pythonProject17/transformers/model_em/BAAI/bge-small-en-v1.5\n"
     ]
    }
   ],
   "execution_count": 13
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-17T07:52:51.014049Z",
     "start_time": "2025-09-17T07:52:37.416294Z"
    }
   },
   "cell_type": "code",
   "source": "finetune_engine.finetune()",
   "id": "456359b58ae4a405",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]"
      ],
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
       "model_id": "e83255ed025f4683ac206ba88b8f8d3c"
      }
     },
     "metadata": {},
     "output_type": "display_data",
     "jetTransient": {
      "display_id": null
     }
    },
    {
     "data": {
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ],
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='4' max='4' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [4/4 00:06, Epoch 2/2]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "      <th>Cosine Accuracy@1</th>\n",
       "      <th>Cosine Accuracy@3</th>\n",
       "      <th>Cosine Accuracy@5</th>\n",
       "      <th>Cosine Accuracy@10</th>\n",
       "      <th>Cosine Precision@1</th>\n",
       "      <th>Cosine Precision@3</th>\n",
       "      <th>Cosine Precision@5</th>\n",
       "      <th>Cosine Precision@10</th>\n",
       "      <th>Cosine Recall@1</th>\n",
       "      <th>Cosine Recall@3</th>\n",
       "      <th>Cosine Recall@5</th>\n",
       "      <th>Cosine Recall@10</th>\n",
       "      <th>Cosine Ndcg@10</th>\n",
       "      <th>Cosine Mrr@10</th>\n",
       "      <th>Cosine Map@100</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>No log</td>\n",
       "      <td>No log</td>\n",
       "      <td>0.312500</td>\n",
       "      <td>0.437500</td>\n",
       "      <td>0.517857</td>\n",
       "      <td>0.598214</td>\n",
       "      <td>0.312500</td>\n",
       "      <td>0.145833</td>\n",
       "      <td>0.103571</td>\n",
       "      <td>0.059821</td>\n",
       "      <td>0.312500</td>\n",
       "      <td>0.437500</td>\n",
       "      <td>0.517857</td>\n",
       "      <td>0.598214</td>\n",
       "      <td>0.443300</td>\n",
       "      <td>0.395100</td>\n",
       "      <td>0.412713</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>No log</td>\n",
       "      <td>No log</td>\n",
       "      <td>0.312500</td>\n",
       "      <td>0.437500</td>\n",
       "      <td>0.517857</td>\n",
       "      <td>0.598214</td>\n",
       "      <td>0.312500</td>\n",
       "      <td>0.145833</td>\n",
       "      <td>0.103571</td>\n",
       "      <td>0.059821</td>\n",
       "      <td>0.312500</td>\n",
       "      <td>0.437500</td>\n",
       "      <td>0.517857</td>\n",
       "      <td>0.598214</td>\n",
       "      <td>0.443300</td>\n",
       "      <td>0.395100</td>\n",
       "      <td>0.412713</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ]
     },
     "metadata": {},
     "output_type": "display_data",
     "jetTransient": {
      "display_id": "adefab03ddfd916463258ca071872f51"
     }
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Information Retrieval Evaluation of the model on the  dataset in epoch 1.0 after 2 steps:\n",
      "Information Retrieval Evaluation of the model on the  dataset in epoch 1.0 after 2 steps:\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Queries: 112\n",
      "Queries: 112\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Corpus: 56\n",
      "\n",
      "Corpus: 56\n",
      "\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Score-Function: cosine\n",
      "Score-Function: cosine\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@1: 31.25%\n",
      "Accuracy@1: 31.25%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@3: 43.75%\n",
      "Accuracy@3: 43.75%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@5: 51.79%\n",
      "Accuracy@5: 51.79%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@10: 59.82%\n",
      "Accuracy@10: 59.82%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Precision@1: 31.25%\n",
      "Precision@1: 31.25%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Precision@3: 14.58%\n",
      "Precision@3: 14.58%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Precision@5: 10.36%\n",
      "Precision@5: 10.36%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Precision@10: 5.98%\n",
      "Precision@10: 5.98%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Recall@1: 31.25%\n",
      "Recall@1: 31.25%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Recall@3: 43.75%\n",
      "Recall@3: 43.75%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Recall@5: 51.79%\n",
      "Recall@5: 51.79%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Recall@10: 59.82%\n",
      "Recall@10: 59.82%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:MRR@10: 0.3951\n",
      "MRR@10: 0.3951\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:NDCG@10: 0.4433\n",
      "NDCG@10: 0.4433\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:MAP@100: 0.4127\n",
      "MAP@100: 0.4127\n",
      "INFO:sentence_transformers.SentenceTransformer:Save model to test_model\n",
      "Save model to test_model\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Information Retrieval Evaluation of the model on the  dataset in epoch 2.0 after 4 steps:\n",
      "Information Retrieval Evaluation of the model on the  dataset in epoch 2.0 after 4 steps:\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Queries: 112\n",
      "Queries: 112\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Corpus: 56\n",
      "\n",
      "Corpus: 56\n",
      "\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Score-Function: cosine\n",
      "Score-Function: cosine\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@1: 31.25%\n",
      "Accuracy@1: 31.25%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@3: 43.75%\n",
      "Accuracy@3: 43.75%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@5: 51.79%\n",
      "Accuracy@5: 51.79%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@10: 59.82%\n",
      "Accuracy@10: 59.82%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Precision@1: 31.25%\n",
      "Precision@1: 31.25%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Precision@3: 14.58%\n",
      "Precision@3: 14.58%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Precision@5: 10.36%\n",
      "Precision@5: 10.36%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Precision@10: 5.98%\n",
      "Precision@10: 5.98%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Recall@1: 31.25%\n",
      "Recall@1: 31.25%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Recall@3: 43.75%\n",
      "Recall@3: 43.75%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Recall@5: 51.79%\n",
      "Recall@5: 51.79%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Recall@10: 59.82%\n",
      "Recall@10: 59.82%\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:MRR@10: 0.3951\n",
      "MRR@10: 0.3951\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:NDCG@10: 0.4433\n",
      "NDCG@10: 0.4433\n",
      "INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:MAP@100: 0.4127\n",
      "MAP@100: 0.4127\n"
     ]
    }
   ],
   "execution_count": 14
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-17T07:52:57.948186Z",
     "start_time": "2025-09-17T07:52:56.988601Z"
    }
   },
   "cell_type": "code",
   "source": "embed_model = finetune_engine.get_finetuned_model()",
   "id": "64fe4e4ad217005a",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: test_model\n",
      "Load pretrained SentenceTransformer: test_model\n"
     ]
    }
   ],
   "execution_count": 15
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-17T07:53:07.795147Z",
     "start_time": "2025-09-17T07:53:07.774143Z"
    }
   },
   "cell_type": "code",
   "source": "embed_model",
   "id": "6b43e6d58426d1a",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "HuggingFaceEmbedding(model_name='test_model', embed_batch_size=10, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x0000022B8AE72290>, num_workers=None, embeddings_cache=None, max_length=512, normalize=True, query_instruction=None, text_instruction=None, cache_folder=None, show_progress_bar=False)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 16
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-17T07:50:15.892537Z",
     "start_time": "2025-09-17T07:50:15.861544Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from tqdm.notebook import tqdm\n",
    "from llama_index.core import VectorStoreIndex\n",
    "from llama_index.core.schema import TextNode\n",
    "\n",
    "\n",
    "def evaluate(\n",
    "    dataset,\n",
    "    embed_model,\n",
    "    top_k=5,\n",
    "    verbose=False,\n",
    "):\n",
    "    corpus = dataset.corpus\n",
    "    queries = dataset.queries\n",
    "    relevant_docs = dataset.relevant_docs\n",
    "\n",
    "    nodes = [TextNode(id_=id_, text=text) for id_, text in corpus.items()]\n",
    "    index = VectorStoreIndex(\n",
    "        nodes, embed_model=embed_model, show_progress=True\n",
    "    )\n",
    "    retriever = index.as_retriever(similarity_top_k=top_k)\n",
    "\n",
    "    eval_results = []\n",
    "    for query_id, query in tqdm(queries.items()):\n",
    "        retrieved_nodes = retriever.retrieve(query)\n",
    "        retrieved_ids = [node.node.node_id for node in retrieved_nodes]\n",
    "        expected_id = relevant_docs[query_id][0]\n",
    "        is_hit = expected_id in retrieved_ids  # assume 1 relevant doc\n",
    "\n",
    "        eval_result = {\n",
    "            \"is_hit\": is_hit,\n",
    "            \"retrieved\": retrieved_ids,\n",
    "            \"expected\": expected_id,\n",
    "            \"query\": query_id,\n",
    "        }\n",
    "        eval_results.append(eval_result)\n",
    "    return eval_results"
   ],
   "id": "c84b4fb1c7004ba7",
   "outputs": [],
   "execution_count": 12
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "",
   "id": "8fc4cc212e932f0"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
